<?php
declare (strict_types=1);

namespace app\common\library\media;

use GuzzleHttp\Client;
use GuzzleHttp\Exception\RequestException;
use QL\QueryList;
use QL\Ext\PhantomJs;

class Crawler
{
    protected $client;

    protected $os;

    protected $ql;

    protected $async = false;

    protected $method = 'GET';

    protected $timeout = 5;//秒

    protected $rules = [];

    protected $range;

    protected $accept = 'html';

    public function __construct($config)
    {
        $this->client = new Client();

        $this->os = PHP_OS == 'Linux' ? 'linux' : 'windows';
        $this->method = isset($config['method']) ? $config['method'] : $this->method;
        $this->timeout = isset($config['timeout']) ? $config['timeout'] : $this->timeout;
        $this->async = isset($config['async']) ? $config['async'] : $this->async;
        $this->rules = isset($config['rules']) ? $config['rules'] : $this->rules;
        $this->range = isset($config['range']) ? $config['range'] : $this->range;
        $this->accept = isset($config['accept']) ? $config['accept'] : $this->accept;
        
        $ql = QueryList::getInstance();
        if ($this->os == 'linux' && $this->async == true) {
            $path = app()->getRootPath() . 'phantomjs/linux/phantomjs';
            $ql->use(PhantomJs::class, $path);
        }
        if ($this->os == 'windows' && $this->async == true) {
            $path = app()->getRootPath() . 'phantomjs/windows/phantomjs.exe';
            $ql->use(PhantomJs::class, $path);
        }
        $this->ql = $ql;
    }

    public function getWebsiteHtml($url, $params = null)
    {
        $this->url = $url;
        $this->headers = [
            //'Content-Type' => 'text/html; charset=utf-8',
            'User-Agent' => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.98 Safari/537.36",
            'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'X-Foo' => ['Bar', 'Baz']
        ];
        try {
            if ($this->async == true) {
                $this->ql->browser(function (\JonnyW\PhantomJs\Http\RequestInterface $r) {
                    $r->setMethod($this->method);
                    $r->setUrl($this->url);
                    $r->setTimeout($this->timeout * 1000); // 10 seconds
                    //$r->setDelay(3); // 3 seconds
                    $r->setHeaders($this->headers);
                    return $r;
                })->getHtml();
            } else {
                if ($this->method == 'GET') {
                    $this->ql->get($url, $params, [
                        'timeout' => $this->timeout,
                        'headers' => $this->headers,
                    ])->getHtml();
                }
                if ($this->method == 'POST') {
                    $this->ql->post($url, $params, [
                        'timeout' => $this->timeout,
                        'headers' => $this->headers,
                    ])->getHtml();
                }
            }
        } catch (RequestException $e) {
            $this->ql->setHtml('');
        }

        return $this->ql->getHtml();
    }

    public function getHtmlData()
    {
        $rules = [
            'title' => ['a', 'title'],
            'cover' => ['img', 'src'],
            'url' => ['a', 'href'],
            'time' => ['.post_content', 'text'],
            'extra' => ['.post_content', 'text']
        ];
        $html = $this->ql->getHtml();

        $dataList = [];
        if ($html) {
            if ($this->accept == 'html') {
                $dataList = $this->ql->rules($this->rules)->range($this->range)->query()->getData();
            }
            if ($this->accept == 'json') {
                $arrHtml = json_decode($html, true);
                $arrRange = explode('|', $this->range);
                $resList = $arrHtml;
                foreach ($arrRange as $k => $v) {
                    if (isset($resList[$v])) {
                        $resList = $resList[$v];
                    }
                }
                foreach ($resList as $k => $v) {
                    $title = isset($v[$this->rules['title']]) ? $v[$this->rules['title']] : '';
                    $cover = isset($v[$this->rules['cover']]) ? $v[$this->rules['cover']] : '';
                    $url = isset($v[$this->rules['url']]) ? $v[$this->rules['url']] : 'javascript:void(0);';
                    $time = isset($v[$this->rules['time']]) ? $v[$this->rules['time']] : '';
                    $extra = isset($v[$this->rules['extra']]) ? $v[$this->rules['extra']] : '';
                    $dataList[] = [
                        'title' => $title,
                        'cover' => $cover,
                        'url' => $url,
                        'time' => $time,
                        'extra' => $extra,
                    ];
                }
            }
        }
        return $dataList;
    }

}